#!/usr/bin/perl
#####
#
# Show bandwidth used for specified flows
# (c) 2009-2011 - JayKim <klathor.at.gmail.dot.com>
#
#####

=pod

=head1 NAME

ratesniff - Show bandwidth used for specified flows

=head1 SYNOPSIS

 ratesniff [options] "name1=regex1" "name2=regex2"...

 Options
    -h              Print usage and quit
    -m              Print full manpage and quit
    -V              Print version and quit
    -i <dev>        The device to pass to tcpdump, defaults to eth0
    -n <interval>   The seconds between rate dumps, defaults to 10
    -t              Print a timestamp at the end of each line
    -d              Print debug info, for testing regexes
    -C <command>    Override the tcpdump command
    -A <args>       Override the options passed to tcpdump
    -E <expression> Matching expression passed to tcpdump
    -b              Disable output buffering
    -r <scale>      Set displayed rate scale

=head1 DESCRIPTION

This program will take a series of named regexes and periodically print out the
bandwidth used for all matching flows. Regexes are applied to the output of 
C<'tcpdump -nnql -i eth0'> by default.

=cut

use warnings; 
use strict;

use Pod::Usage;
use Getopt::Std;
use FileHandle;
use POSIX qw(strftime);

use constant VERSION => '0.0.3';

# Matching regexes for different protocols
my %PROTOS = (tcp  => qr/\stcp\s(\d+)/,
              udp  => qr/\sUDP,\slength\s(\d+)/,
              icmp => qr/\sICMP\s.*\slength\s(\d+)/);

# Defaults
my %OPTS;
$OPTS{i} = 'eth0';      # Interface
$OPTS{n} = 10;          # Interval
$OPTS{C} = 'tcpdump';   # tcpdump command
$OPTS{A} = '-nnql';     # tcpdump args
$OPTS{E} = '';          # tcpdump filter expression
$OPTS{r} = 'a';         # Displayed rate mode

# Parse opts
getopts('hmVi:n:tdC:A:E:br:', \%OPTS) or pod2usage;
$OPTS{h} and pod2usage(1);
$OPTS{m} and pod2usage(-verbose => 2);
$OPTS{V} and 
	print "ratesniff v", VERSION, "\n" and exit;
$OPTS{r} =~ /^[aAkKmMgG]$/ or
    pod2usage("Rate scale must be one of a|A|k|K|m|M|g|G\n");
@ARGV or
    pod2usage("No regexes given.\n");
grep(!/=/, @ARGV) and
    pod2usage("Invalid argument.\n");
$OPTS{b} and $| = 1;

# Parse args
my %flows = map({/(.*?)=(.*)/ and $1 => qr/$2/} @ARGV);
my $num_flows = @ARGV;

# Call tcpdump
my $dumpout = new FileHandle "$OPTS{C} $OPTS{A} -i $OPTS{i} $OPTS{E}|" or
    die("Unable to run tcpdump");

# Set formats
my @headers = sort(keys(%flows));
my @fields;
my $time = '';
if ($OPTS{t}) {
    $time = "  @|||||||||||||||";
    push(@headers, "Time");
}
my $format = "format STDOUT_TOP =\n".
    '@>>>>>>>>>.... ' x $num_flows . "$time\n".
    "\@headers\n".
    ".\n";
eval $format;
$format = "format STDOUT =\n".
    '@####.## @>>>> ' x $num_flows . "$time\n".
    "\@fields\n".
    ".\n";
eval $format;

# Initialize counters
my %size;
# Fancy way to set all sizes to 0... I was bored...
@size{keys(%flows)} = unpack("C*", "\000" x $num_flows);
my $last = time;

# Read from tcpdump forever!
print "Reading from tcpdump...\n";
while (chomp(my $input = $dumpout->getline())) { 
    print STDERR "Got: '$input'\n" if $OPTS{d};

    # Match each filter, multiple filters could match
    for my $flow (keys(%flows)) {
        $input =~ /$flows{$flow}/ or next;
        print STDERR "Matched: $flow" if $OPTS{d};

        my $bytes;
        for my $proto (keys(%PROTOS)) {
            if (($bytes) = $input =~ /$PROTOS{$proto}/) {
                print STDERR ", proto $proto, len $bytes" if $OPTS{d};
                last;
            }
        }
        print STDERR "\n" if $OPTS{d};
        $size{$flow} += $bytes if $bytes;
    }

    # Skip the rest unless we reach the interval
    my $now = time;
    next if $now-$last < $OPTS{n};

    # Make up some rates
    @fields = ();
    for (sort(keys(%flows))) {
        # Start with KiB/s
        my $rate = $size{$_}/(1024*($now-$last));
        my $unit = "KiB/s"; 

        # Want bits or bytes?
        if ($OPTS{r} =~ /^[akmg]$/) {
            # Want bits, convert
            $rate *= 8;
            $unit ="Kib/s";

            # Start scaling
            if (($OPTS{r} eq 'a' and $rate > 1000) or
                ($OPTS{r} =~ /^[mg]$/)) {
                $rate /= 1024; 
                $unit ="Mib/s";
            } 
            if (($OPTS{r} eq 'a' and $rate > 1000) or
                ($OPTS{r} =~ /^[g]$/)) {
                $rate /= 1024; 
                $unit ="Gib/s";
            } 
        } else {
            # Stay with bytes, start scaling
            if (($OPTS{r} eq 'A' and $rate > 1000) or
                ($OPTS{r} =~ /^[MG]$/)) {
                $rate /= 1024; 
                $unit ="MiB/s";
            } 
            if (($OPTS{r} eq 'A' and $rate > 1000) or
                ($OPTS{r} =~ /^[G]$/)) {
                $rate /= 1024; 
                $unit ="GiB/s";
            } 
        }
        push(@fields, $rate, $unit);
    }
    push(@fields, strftime("%b %e %T", localtime)) if $OPTS{t};
    write;

    # Reset counters
    @size{keys(%flows)} = unpack("C*", "\000" x $num_flows);
    $last = $now;
}

exit;

=pod

=head1 OPTIONS

=over

=item B<-h>

Print a brief help message and exit.

=item B<-m>

Print the full manpage and exit.

=item B<-V>

Print version and exit.

=item B<< -i <dev> >>

Pass the specified device to F<tcpdump>. Defaults to C<eth0>.

=item B<< -n <interval> >>

Wait <interval> seconds between rate dumps, defaults to 10 seconds.

=item B<-t>

Print a timestamp at the end of each line.

=item B<-d>

Turns on debugging output for testing regexes. All debug messages are piped to 
STDERR. Get ready for lots of output.

=item B<< -C <command> >>

Override the F<tcpdump> command. Defaults to 'tcpdump'. Maybe you want to 
specify a full path? Go nuts.

=item B<< -A <args> >>

Override the options passed to tcpdump. Defaults to '-nnql'. I would not touch
these unless you know what you are doing.

=item B<< -E <expression> >>

Matching expression passed to tcpdump. Empty by default. If you have lots of
traffic you do not care about you may want to try filtering it with this.

=item B<-b>

Disable output buffering. Useful for getting realtime updates when redirecting 
output.

=item B<< -r <scale> >>

Set displayed rate scale. <scale> can be one of:

   a - Autoscale in binary bits/second (Kib/s, Mib/s, Gib/s)
   A - Autoscale in binary bytes/second (KiB/s, MiB/s, GiB/s)
   k - Force rates in kibibits/second (Kib/s)
   m - Force rates in mebibits/second (Mib/s)
   g - Force rates in gibibits/second (Gib/s)
   K - Force rates in kibibytes/second (KiB/s)
   M - Force rates in mebibytes/second (MiB/s)
   G - Force rates in gibibytes/second (GiB/s)

I know. I hate these prefixes too, but I rather be specific.

=back

=head1 REGEXES

All other arguments are assumed to be named regexes. Regex format is:

<name>=<regex>

That is: a short name, followed by an equal sign, followed by a perl regular
expression. The regex can be anything perl considers valid. Turn on debugging
mode to see the output of F<tcpdump> for examples of what to match. B<Make sure
the entire arg is quoted if necessary.>

=head1 BUGS AND DISCLAIMERS

If tcpdump reports packets 'dropped by kernel', you cannot count them with this
script either. 

This script only counts the user data bytes as reported by tcpdump. Packet
headers are not counted, nor are protocols other than tcp/udp/icmp (for now).

Even if packets are not dropped there are no guarantees that the output of this 
script is even close to accurate. Do not believe the output. Do not use the 
output for anything important. Above all, do not blame me if you ignore what 
I just said.

=cut
